import requests
import json
import re

basic_url='https://www.gushiwen.org/default_{}.aspx'
headers={
    'user-agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36'
}
poems=[]

def parse(url):
    r=requests.get(url,headers=headers)
    r.encoding=r.apparent_encoding
    text=r.text
    titles=re.findall(r'<div class="cont">.*?<b>(.*?)</b>',text,re.DOTALL)
    dynasity=re.findall(r'<p class="source">.*?<a .*? target="_blank">(.*?)</a>',text,re.DOTALL)
    authors=re.findall(r'<p class="source">.*?<a .*? target="_blank">.*?</a>.*?<a .*? target="_blank">(.*?)</a>',text,re.DOTALL)
    body=re.findall(r'<div class="contson" id=.*?>(.*?)</div>',text,re.DOTALL)
    contents=[]
    for i in body:
        x=re.sub(r'<.*?>','',i).strip()
        contents.append(x)

    for value in zip(titles,dynasity,authors,contents):
        title,dynasity,author,content=value
        a={
            'title':title,
            'dynasity':dynasity,
            'author':author,
            'content':content
        }
        poems.append(a)
    print_poem(poems)

def print_poem(poems):
    f=open('collection/gushi.txt','w')
    f.close()
    with open('collection/gushi.txt','a',encoding='utf-8') as f:
        f.write('{\n')
        for i in poems:
            f.write('{\n')
            f.write(i['title']+'\n'+i['dynasity']+'\n'+i['author']+'\n'+i['content']+'\n')
            f.write('}')
        f.write('}')

def print_json():
    f=open('collection/gushi.json','w')
    f.close()
    with open('collection/gushi.json','a',encoding='utf-8') as f:
        f.write('{\n')
        for i in poems:
            json.dump(i,f,ensure_ascii=False)
            f.write('\n')
        f.write('}')

def main(num=5):
    for i in range(1,num):
        url=basic_url.format(str(i))
        parse(url)
    print_json()

if __name__=='__main__':
    main()
